home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The World of Computer Software
/
The World of Computer Software.iso
/
tags18.zip
/
ASMTAG.E
< prev
next >
Wrap
Text File
|
1992-03-29
|
26KB
|
776 lines
/*
EPSHeader
File: asmtag.c
Author: J. Kercheval
Created: Sun, 07/14/1991 17:25:26
*/
/*
EPSRevision History
J. Kercheval Sun, 07/14/1991 20:25:59 creation
J. Kercheval Mon, 07/15/1991 22:47:30 finish finite state machine parser
J. Kercheval Wed, 07/17/1991 21:35:43 add IsMember() and get_token()
J. Kercheval Thu, 07/18/1991 19:57:34 add flags checking
J. Kercheval Sun, 07/21/1991 15:58:56 add comment block support
J. Kercheval Sat, 07/27/1991 21:16:53 remove public post process support
J. Kercheval Sat, 07/27/1991 22:50:49 performance considerations (+10%)
J. Kercheval Sat, 08/10/1991 18:14:46 Speed up IsMember()
J. Kercheval Fri, 09/13/1991 01:17:05 add when_loading() to remap def_srch_case_map[]
J. Kercheval Thu, 10/03/1991 12:27:37 fix logic outputting local labels
J. Kercheval Sat, 10/05/1991 14:06:33 add ASMTagWant defines
*/
/*
* This file implements tagging for .ASM and .INC files which contain 80x86
* assembler in the MASM/TASM syntax. This file defines no new commands and
* is intended to work with the tags package included with V5.0 of Epsilon.
* There is no problem using modified tags packages providing calls are made
* to tags_suffix_???() routines in the same way Epsilon does this and that
* an output routine add_tag() is used. All that should be required is to
* compile and load this file and this module will be used transparently to
* you.
*
* This module implements tagging for UNION, STRUC, MACRO, PROC, LABEL
* keywords as well as for implicit labels (label:) and for data defintions
* (ie. equ, =, dq, dw, db, etc....). The performance cost on a per tag
* basis is negligable, but since more tagging is done, you should expect a
* practical 10%-20% performance hit on a per file basis. This tagger is not
* intended to do all of your work for you but is designed to be used in
* conjunction with the tags generator I have developed and is now available.
* This file implements the same semantic parser as is found in that
* executable. Use the executable in your make file for very fast and
* updated tags. If you have problems finding it, contact me and I can point
* the way...
*
* There is defined at the end of this module a when_loading() function which
* alters the default search case map to allow *correct* (or at least
* consistent sorting with sort routines external to Epsilon. In particular,
* to produce the same sort order as any UNIX, VMS or HP style sort or with
* the tags generator this module is supposed to coexist with this mapping
* must be done. You should see no difference in the location of sorted
* buffers except for lines starting with ^, [, \, ] and _.
*
* This code is dedicated to the public domain with the caveat that Lugaru is
* welcome to use this within their distribution source code which is
* supplied with Epsilon.
*
* Good Tagging,
*
* jbk@wrq.com
*
* John Kercheval
* 127 NW Bowdoin Pl #105
* Seattle, WA 98107-4960
* August 10, 1991
*/
#include <eel.h>
#ifndef BOOLEAN
#define BOOLEAN int
#define TRUE 1
#define FALSE 0
#endif
/* This is a list of the types of tokens you may want to tag. Set them true
* if you want that particular type of tag.
*/
#define ASMTagWantProc TRUE
#define ASMTagWantMacro TRUE
#define ASMTagWantLabel TRUE
#define ASMTagWantStruc TRUE
#define ASMTagWantUnion TRUE
#define ASMTagWantDefine TRUE
/*
* The finite state machine allows the following interesting paths
*
* 1 - Discard, Parse1, Symbol1
* 2 - Discard, Parse1, Parse2, Symbol2
* 3 - Discard, Parse1, Parse2, Define
*
* all the important cases follow one of these paths according to MASM/TASM
* syntax. The exit state is for finish up routine calls and some paths not
* covered here are simple error paths and probably result from syntax errors
*
* enum state { Discard, Parse1, Parse2, Symbol1, Symbol2, Define, Exit };
*/
/*
* emulate an enumerated type for the state machine
*/
#define Discard 0
#define Parse1 1
#define Parse2 2
#define Symbol1 3
#define Symbol2 4
#define Define 5
#define Exit 6
typedef int State;
#define COMMENT_CHAR ';'
#define SYMBOL_SIZE 15
/*----------------------------------------------------------------------------
*
* The symbol lists represent all the symbols we are interested in either
* obtaining or ignoring. The first element of each of these symbol lists is
* a string containing all the first characters within the symbol list. This
* allows faster rejection for IsMember() which is called often.
*
---------------------------------------------------------------------------*/
/* symbols which are not significant for this parser */
char ASM_NOP_Sym[][SYMBOL_SIZE] =
{
"cpbfnwo", /* list of starting characters of symbols
* below */
"c", /* C language declaration */
"pascal", /* PASCAL language declaration */
"basic", /* BASIC language declaration */
"fortran", /* FORTRAN language declaration */
"prolog", /* PROLOG language declaration */
"nolanguage", /* generic language declaration */
"windows", /* WINDOWS exit and entry modifier */
"oddnear", /* overlay modifier */
"oddfar", /* overlay modifier */
"normal", /* normal procedure entry/exit code */
"\0"
};
/* symbols which begin a comment block */
char ASM_comment_block[][SYMBOL_SIZE] =
{
"c", /* list of starting characters of symbols
* below */
"comment", /* begin comment block, next character is
* delimiter */
"\0"
};
/* create the function for determining if a character is a delimiter */
#define IsDelim(c) ( _ASM_delim_table[c] )
/* the indexed table for white space character lookup */
BOOLEAN _ASM_delim_table[256];
/* valid delimiters for this syntax */
char ASM_delim[] = " \t\n;:=.,\"()<>[]*-+/";
/* create the function for determining if a character is a whitespace */
#define IsWhite(c) ( _ASM_white_table[c] )
/* the indexed table for white space character lookup */
BOOLEAN _ASM_white_table[256];
/* whitespace characters */
char ASM_white[] = " \t\v\f";
/* symbols which both are delimiters and a special token, these are
special tokens only when found at the the beginning of a string of
1 or more delimiters */
char ASM_delim_Sym[] = "=:";
/* symbols which fit into the Define state and represent a tagged symbol */
/* state Define depends on the token ":" being at index 1 in this list */
char ASM_def[][SYMBOL_SIZE] =
{
":e=cd", /* list of starting characters of symbols
* below */
":", /* local labels */
"equ", /* equivalence */
"=", /* equivalence */
"catstr", /* concatenated and named strings */
"db", /* named byte data definition */
"dw", /* named word data definition */
"dd", /* named double word data definition */
"dp", /* named 6 byte far pointer data area
* definition */
"df", /* named 6 byte far pointer definition */
"dq", /* named quad word data definition */
"dt", /* named 10 byte data area */
"\0"
};
/* symbols which fit into the Symbol state and represent a tagged symbol */
char ASM_sym[][SYMBOL_SIZE] =
{
"pmlsu", /* list of starting character of symbols
* below */
"proc", /* procedures */
"macro", /* macros */
"label", /* local labels */
"struc", /* structures */
"union", /* unions */
"\0"
};
/*----------------------------------------------------------------------------
*
* ASMParserInit() initializes the tables required by the parser The tables
* used are a simple boolean index which are true if the character
* corresponding to the index is a member of the associated table.
*
---------------------------------------------------------------------------*/
ASMParserInit()
{
char *s;
int i;
/* init the entire block to FALSE */
for (i = 0; i < 256; i++) {
_ASM_delim_table[i] = FALSE;
_ASM_white_table[i] = FALSE;
}
/* set the characters in the delim set to TRUE */
for (s = ASM_delim; *s; s++) {
_ASM_delim_table[*s] = TRUE;
}
/* NULL is also a delimiter */
_ASM_delim_table['\0'] = TRUE;
/* set the characters in the white set to TRUE */
for (s = ASM_white; *s; s++) {
_ASM_white_table[*s] = TRUE;
}
}
/*----------------------------------------------------------------------------
*
* strchr() is the standard string library function strchr()
*
---------------------------------------------------------------------------*/
char *strchr(s, c)
char *s;
char c;
{
char *ret = s;
while (*ret) {
if (*ret == c)
return ret;
ret++;
}
if (*ret == c)
return ret;
return NULL;
}
/*----------------------------------------------------------------------------
*
* ASMSymbolWanted() returns true if the index into the sym token list is one
* of the wanted symbols according to the ASMTagWant defines. The indexes
* belong with the following ASMTagWant defines:
*
* Flag Symbol Index
* --------------- ------- -----
* ASMTagWantProc "proc" 1
* ASMTagWantMacro "macro" 2
* ASMTagWantLabel "label" 3
* ASMTagWantStruc "struc" 4
* ASMTagWantUnion "union" 5
*
---------------------------------------------------------------------------*/
BOOLEAN ASMSymbolWanted(index)
int index;
{
/* return true if the associated flag is true */
switch (index) {
case 1:
return ASMTagWantProc;
break;
case 2:
return ASMTagWantMacro;
break;
case 3:
return ASMTagWantLabel;
break;
case 4:
return ASMTagWantStruc;
break;
case 5:
return ASMTagWantUnion;
break;
default:
return FALSE;
break;
}
}
/*----------------------------------------------------------------------------
*
* ASMIsMember() takes the token passed and check for membership in the null
* terminated array, tokenlist, and return TRUE if a member and FALSE
* otherwise, index is the index into the token list of the symbol if return
* value is TRUE
*
---------------------------------------------------------------------------*/
BOOLEAN ASMIsMember(token_list, token, index)
char token_list[][SYMBOL_SIZE];
char *token;
int *index;
{
int old_case_fold = case_fold;
/* use non case sensitive string compare */
case_fold = 1;
/* look for dirty rejection */
if (!strchr(token_list[0], tolower(token[0])))
return FALSE;
/* march through array until membership is determined */
for (*index = 1; *token_list[*index]; (*index)++) {
/* return true if token found */
if (!strfcmp(token, token_list[*index])) {
case_fold = old_case_fold;
return TRUE;
}
}
/* did not find it */
case_fold = old_case_fold;
return FALSE;
}
/*----------------------------------------------------------------------------
*
* ASM_get_token() will obtain the next token in the line pointed to by lptr
* and in addition will return FALSE if EOL is reached or a comment character
* is the first non whitespace character found
*
---------------------------------------------------------------------------*/
BOOLEAN ASM_get_token(lptr, token)
char **lptr;
char *token;
{
char *s; /* start location in string */
int token_length; /* the length of the current token */
int dummy; /* a temporary variable */
/* loop until we have a valid token or end of string */
do {
/* move past whitespace */
while (IsWhite(**lptr)) {
(*lptr)++;
}
/* return false if end of line */
if (!**lptr)
return FALSE;
/* check if comment */
if (**lptr == COMMENT_CHAR) {
return FALSE;
}
/* check of delimiter token */
if (strchr(ASM_delim_Sym, **lptr)) {
token[0] = **lptr;
token[1] = '\0';
(*lptr)++;
}
else {
/* save the beginning location */
s = *lptr;
/* move to the next delimiter in the line */
while (!IsDelim(**lptr)) {
(*lptr)++;
}
/* get the token */
token_length = *lptr - s;
strncpy(token, s, token_length);
token[token_length] = '\0';
}
} while (ASMIsMember(ASM_NOP_Sym, token, &dummy));
return TRUE;
}
/*----------------------------------------------------------------------------
*
* getline() obtain the next line in the buffer
*
---------------------------------------------------------------------------*/
BOOLEAN getline(inbuf, line)
char *inbuf;
char *line;
{
char *oldbuf = bufname;
int cur_point = point;
bufname = inbuf;
nl_forward();
if (cur_point != point) {
grab(cur_point, point, line);
}
else {
return FALSE;
}
bufname = oldbuf;
return TRUE;
}
/*----------------------------------------------------------------------------
*
* output_tag() places the tag in the correct format into the output buffer
* by a call to add_tag()
*
---------------------------------------------------------------------------*/
output_tag(outbuf, line, symbol, infname, line_number, char_number)
char *outbuf;
char *line;
char *symbol;
char *infname;
int line_number;
int char_number;
{
/* this is just a shell call to add_tag() defined in tags.e but is an
* ideal place to add code for other output formats or extra output
* information etc. */
add_tag(symbol, char_number);
return;
}
/*----------------------------------------------------------------------------
*
* ASMtags() tags an input stream assuming input format of ASM 80x86 format
* in MASM/TASM syntax
*
---------------------------------------------------------------------------*/
ASMTags(inbuf, infname, outbuf)
char *inbuf;
char *infname;
char *outbuf;
{
State state; /* the current state of the parser */
char line[256]; /* the current input line */
char cur_token[256]; /* the current token */
char prev_token[256]; /* the previous token */
char *lptr; /* pointer into line for token parser */
char *prev_lptr; /* pointer into line for previous token */
int line_number; /* the current line in the file */
int line_length; /* the length of the current line */
int char_number; /* the current character in the file */
int symbol_index; /* the index into the token list of the
* symbol */
char *oldbuf = bufname;
spot oldpoint = alloc_spot();
spot oldmark = alloc_spot();
/* save current buffer state */
*oldpoint = point;
*oldmark = mark;
/* init the engine */
ASMParserInit();
cur_token[0] = '\0';
prev_token[0] = '\0';
state = Discard;
line_number = 0;
line_length = 0;
char_number = 0;
lptr = prev_lptr = (char *) NULL;
for (;;) {
switch (state) {
case Discard: /* current line is not valid */
/* if EOF then return */
if (getline(inbuf, line)) {
lptr = line;
/* increment counters */
line_number++;
/* char_number increments by length of previous line */
char_number += line_length;
/* line length */
line_length = strlen(line);
state = Parse1;
}
else {
state = Exit;
}
break;
case Parse1: /* parsing for first *special* token */
/* get the next valid token */
if (!ASM_get_token(&lptr, cur_token)) {
/* if no token left or a comment as first non white space
* char in remainder of line */
state = Discard;
}
else {
/* move the cur_token to prev_token */
strcpy(prev_token, cur_token);
/* check for membership in the tagging symbol club */
if (ASMIsMember(ASM_sym, cur_token, &symbol_index)) {
state = Symbol1;
}
else {
/* check if comment block */
if (ASMIsMember(ASM_comment_block,
cur_token, &symbol_index)) {
/* get the next non white character, this makes
* the assumption that the delimiter character is
* on the same line as the comment symbol. If the
* delimiter character is not on the current line
* then parsing continues normally on the next
* line. */
while (IsWhite(*lptr)) {
lptr++;
}
if (*lptr) {
/* this is the delimiter character, store it
* and move lptr past it */
*cur_token = *lptr;
lptr++;
/* move over comment block, remembering to
* update line info as we go */
while (*lptr != *cur_token) {
/* get a new line if end of line */
if (!*lptr) {
if (!getline(inbuf, line)) {
*cur_token = *lptr;
}
else {
lptr = line;
/* increment counters */
line_number++;
/* char_number increments by
* length of previous line */
char_number += line_length;
/* line length */
line_length = strlen(line);
}
}
else {
lptr++;
}
}
}
state = Discard;
}
else {
/* nothing special, parse the next symbol */
state = Parse2;
}
}
}
break;
case Parse2: /* parsing for second *special* token */
/* save the previous position */
prev_lptr = lptr;
/* get the next token */
if (!ASM_get_token(&lptr, cur_token)) {
/* no token left, reset machine */
state = Discard;
}
else {
if (ASMIsMember(ASM_sym, cur_token, &symbol_index)) {
/* found a major symbol */
state = Symbol2;
}
else {
if (ASMIsMember(ASM_def, cur_token, &symbol_index)) {
/* found a defining token */
state = Define;
}
else {
state = Discard;
}
}
}
break;
case Symbol1: /* next token, ignore if no token found */
/* get the next symbol and output it */
if (ASM_get_token(&lptr, cur_token)) {
if (ASMSymbolWanted(symbol_index)) {
output_tag(outbuf, line, cur_token, infname,
line_number, char_number +
lptr - line -
strlen(cur_token));
}
}
/* reset machine */
state = Discard;
break;
case Symbol2: /* previous token was the wanted symbol */
/* the previous token is the symbol of interest */
if (ASMSymbolWanted(symbol_index)) {
output_tag(outbuf, line, prev_token, infname,
line_number, char_number +
prev_lptr - line -
strlen(prev_token));
}
/* reset machine */
state = Discard;
break;
case Define: /* previous token was the wanted symbol */
/* the previous token is the symbol of interest */
if ((ASMTagWantDefine && symbol_index != 1) ||
(ASMTagWantLabel && symbol_index == 1)) {
output_tag(outbuf, line, prev_token, infname,
line_number, char_number +
prev_lptr - line -
strlen(prev_token));
}
/* reset machine */
state = Discard;
break;
case Exit: /* clean it up */
/* restore original location */
bufname = oldbuf;
point = *oldpoint;
mark = *oldmark;
free_spot(oldpoint);
free_spot(oldmark);
return;
break;
default: /* not reached */
break;
}
}
}
/*----------------------------------------------------------------------------
*
* tag_suffix_asm() and tag_suffix_inc() are recognized procedure names
* to the tags package in Epsilon and will be called automatically when
* tagging needs to happen for these extensions. tag_suffix_asm() is a
* replacement for the routine of the same name defined in tags.e and
* tag_suffix_inc() is new.
*
---------------------------------------------------------------------------*/
tag_suffix_asm()
{
/* the third parameter, the output buffer name is not actually used by
* anyone but is left here for a time when this information may be
* needed. The current algorithm is to let the funtion add_tag() decide
* the buffer name to send the output to. As a little more than
* coincedence, the name used here is the same used in add_tag() defined
* in tags.e */
ASMTags(bufname, filename, "-tags");
}
tag_suffix_inc()
{
tag_suffix_asm();
}
#ifdef foo
/* rebuild the default character maps */
when_loading()
{
#define UCLC(up, low) _def_char_class[low] = C_LOWER, \
_def_char_class[up] = C_UPPER, \
_def_srch_case_map[up] = low, \
_def_case_map[low] = up, \
_def_case_map[up] = low
int i, j;
for (i = 0; i < 256; i++)
_def_case_map[i] = _def_srch_case_map[i] = i;
for (i = 'A', j = 'a'; i <= 'Z'; i++, j++)
UCLC(i, j);
for (i = 131; i < 154; i++)
_def_char_class[i] = C_LOWER;
for (i = 160; i < 164; i++)
_def_char_class[i] = C_LOWER;
UCLC('Ç', 'ç');
UCLC('Ä', 'ä');
UCLC('Å', 'å');
UCLC('É', 'é');
UCLC('Æ', 'æ');
UCLC('Ö', 'ö');
UCLC('Ü', 'ü');
UCLC('Ñ', 'ñ');
}
#endif